package constructdata;

import variableprocessing.GloveVocab;
import variableprocessing.StopWords;

import java.io.File;
import java.io.FileReader;
import java.io.*;
import java.util.Scanner;
import java.util.*;
import java.util.logging.Logger;

/**
 * 构造训练数据
 * Construct Training Data - Version_3.0
 * Created by wangxin on 23/11/2017.
 */
public class ConstructDataMain3 {

    static int subBatches = 5; // The number of sub batches in a batch
    static int subBatchSize = 1000000000;// The size of a sub batch

    public static void main(String[] args) throws Exception {
        long startTime = System.currentTimeMillis();
        String globalPath = System.getProperty("user.dir");

        //String filePaths = globalPath + "/Extractor/src/main/java/fileextractor/antlr4List.txt";
        //String filePaths ="/Users/lingxiaoxia/Desktop/filePath.txt";
        String filePaths ="/Users/lingxiaoxia/Desktop/graphtestset/log4jTestCase/testCaseTrace.txt";
        //String outputPath = globalPath + "/Extractor/src/main/java/constructdata/data/";
        String outputPath = "/Users/lingxiaoxia/Desktop/CodeTree/";

        ConstructData test = new ConstructData();

        GloveVocab gloveVocab = new GloveVocab();
        List<String> gloveVocabList = gloveVocab.getGloveList();
        StopWords stopWords = new StopWords();
        List<String> stopWordsList = stopWords.getStopWordsList();

        Scanner scanner = new Scanner(new FileReader(filePaths));
        long cnt = 0;
       // long cntpos = 1;
       // long endpos = 2000;
        //long endpos = 472453;
        //long endpos = 406555;
       // long endpos = 100000;
        //long endpos = 406554;

        long batchi = 0;
        long batchj = 0;
        // read jdk class name
        List<String> jdkList = new ArrayList<>();
        try {
            File fileClassNameMap = new File(globalPath + "/Extractor/src/main/java/constructdata/configs/JDKCLASS.txt");
            FileInputStream fileInputStream = new FileInputStream(fileClassNameMap);
            Scanner scanner2 = new Scanner(fileInputStream);
            while (scanner2.hasNextLine()) {
                String line = scanner2.nextLine();
                jdkList.add(line);
            }
            scanner2.close();
            fileInputStream.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        // Construct data based on the files
        while (scanner.hasNextLine()) {
            cnt++;
            String filePath = scanner.nextLine();
            //filePath = filePath.replaceFirst("/Volumes/zzz","/Volumes/Sea");
//            if (cnt < cntpos) {
//                continue;
//            }
//            if (cnt >= endpos) {
//                break;
//            }
            batchi = getBatch(cnt);
            batchj = getSubBatch(batchi, cnt);
            if (createDir(outputPath, batchi, batchj)) {
                System.out.println(cnt + ".: " + filePath + " -> batch" + batchi + "." + batchj);
                // Create FileWriters for this file
                FileWriter treeWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "trainingTree.txt");
                FileWriter predictionWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "trainingPrediction.txt");
                FileWriter classWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "trainingClassPrediction.txt");
                FileWriter generationNodeWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "generationNode.txt");
                FileWriter treeSentenceWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "treeSentence.txt");
                FileWriter jarWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "jarPrediction.txt");
                FileWriter holesizeWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "holesize.txt");
                FileWriter traceWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "trace.txt");
                FileWriter blockPredictionsWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "blockPredictions.txt");
                FileWriter originalStatementsWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "trainOriginalStatements.txt");
                FileWriter variableNamesWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "trainVariableNames.txt");
                FileWriter linesWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "lines.txt");

                while(true) {
                    if (treeWriter != null && predictionWriter != null && classWriter != null && generationNodeWriter != null
                            && treeSentenceWriter != null && jarWriter != null && holesizeWriter != null && traceWriter != null
                            && blockPredictionsWriter != null && originalStatementsWriter != null && variableNamesWriter != null
                            && linesWriter != null) {
                        break;
                    }else{
                         treeWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "trainingTree.txt");
                         predictionWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "trainingPrediction.txt");
                         classWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "trainingClassPrediction.txt");
                         generationNodeWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "generationNode.txt");
                         treeSentenceWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "treeSentence.txt");
                         jarWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "jarPrediction.txt");
                         holesizeWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "holesize.txt");
                         traceWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "trace.txt");
                         blockPredictionsWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "blockPredictions.txt");
                         originalStatementsWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "trainOriginalStatements.txt");
                         variableNamesWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "trainVariableNames.txt");
                         linesWriter = createFileWriter(outputPath + "batch" + batchi + "/batch" + batchi + "." + batchj + "/" + "lines.txt");
                    }
                }
                // Construct Training data
                try {
                    File file = new File(filePath);
                    if (file.length() / 1024 <= 200) {
                        test.constructData((int) cnt, filePath, true, jdkList,
                                treeWriter,
                                predictionWriter,
                                classWriter,
                                generationNodeWriter,
                                treeSentenceWriter,
                                jarWriter,
                                holesizeWriter,
                                traceWriter,
                                blockPredictionsWriter,
                                originalStatementsWriter,
                                variableNamesWriter,
                                linesWriter,
                                true, globalPath,
                                gloveVocabList,stopWordsList);
                    }
//                    Runtime.getRuntime().gc();
//                    System.out.println(Runtime.getRuntime().totalMemory() / 1024 / 1024 + "MM");
//                    System.out.println("success");
                    //           System.exit(0);
                } catch (Exception e) {
//                    e.printStackTrace();
                } finally {
                    if (treeWriter != null) {
                        treeWriter.close();
                        predictionWriter.close();
                        classWriter.close();
                        generationNodeWriter.close();
                        treeSentenceWriter.close();
                        jarWriter.close();
                        holesizeWriter.close();
                        traceWriter.close();
                        blockPredictionsWriter.close();
                        originalStatementsWriter.close();
                        variableNamesWriter.close();
                        linesWriter.close();
                    }
                }

            }
        }
        FileWriter out= new FileWriter("/Users/lingxiaoxia/Desktop/CodeTree/lines.txt",true);
        out.write(test.linesCount + "\r\n");
        out.write(test.test + "\r\n");
        out.close();
        scanner.close();
        System.out.println("---");
        System.out.println("cnt:" + cnt);
//        System.out.println("cntpos:" + cntpos);
//        System.out.println("endpos:" + endpos);
        System.out.println("End of construct main.");
        long endTime = System.currentTimeMillis();
        String time = formatTime(endTime - startTime);
        System.out.println("total time: " + time);
        System.err.println(test.linesCount);
        System.err.println(test.test);
    }

    // Create directory if the outputPath/batchi/batchi.batchj/ is not exist
    private static boolean createDir(String outputPath, long batchi, long batchj) {
        boolean a = true, b = true;
        File batchiDir = new File(outputPath + "/batch" + batchi);
        if (!batchiDir.exists()) {
            a = batchiDir.mkdir();
        }
        File batchijDir = new File(outputPath + "/batch" + batchi + "/batch" + batchi + "." + batchj);
        if (!batchijDir.exists()) {
            b = batchijDir.mkdir();
        }
        if (a && b) {
            return true;
        }
        return false;
    }

    private static long getBatch(long cnt) {
        return (cnt / (subBatches * subBatchSize)) + 1;
    }

    private static long getSubBatch(long batchi, long cnt) {
        long numerator = cnt;
        if (cnt >= (subBatches * subBatchSize * (batchi - 1))) {
            numerator -= (subBatches * subBatchSize * (batchi - 1));
        }
        return (((numerator) / subBatchSize)) % 5 + 1;
    }

    public static FileWriter createFileWriter(String filePath) {
        File file = new File(filePath);
        try {
            if (!file.exists()) {
                file.createNewFile();
            }
            FileWriter writer = new FileWriter(filePath, true);
            return writer;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public static String formatTime(Long ms) {
        Integer ss = 1000;
        Integer mi = ss * 60;
        Integer hh = mi * 60;
        Integer dd = hh * 24;
        Long day = ms / dd;
        Long hour = (ms - day * dd) / hh;
        Long minute = (ms - day * dd - hour * hh) / mi;
        Long second = (ms - day * dd - hour * hh - minute * mi) / ss;
        Long milliSecond = ms - day * dd - hour * hh - minute * mi - second * ss;
        StringBuffer sb = new StringBuffer();
        if (day > 0) {
            sb.append(day + "d");
        }
        if (hour > 0) {
            sb.append(hour + "h");
        }
        if (minute > 0) {
            sb.append(minute + "m");
        }
        if (second > 0) {
            sb.append(second + "s");
        }
        if (milliSecond > 0) {
            sb.append(milliSecond + "ms");
        }
        return sb.toString();
    }

}
